Notebook

View assignment here

Exercise 1¶

In [1]:

from __future__ import division
import numpy as np
import pdb

# MOVIES: Legally Blond; Matrix; Bourne Identity; You’ve Got Mail;
# The Devil Wears Prada; The Dark Knight; The Lord of the Rings.
movie_titles = ['Legally Blond', 'Matrix', 'Bourne Identity',
                'You’ve Got Mail', 'The Devil Wears Prada',
                'The Dark Knight', 'The Lord of the Rings']
P = [[ 0, 0,-1, 0,-1, 1, 1],  # User 1
     [-1, 1, 1,-1, 0, 1, 1],  # User 2
     [ 0, 1, 1, 0, 0,-1, 1],  # User 3
     [-1, 1, 1, 0, 0, 1, 1],  # User 4
     [ 0, 1, 1, 0, 0, 1, 1],  # User 5
     [ 1,-1, 1, 1, 1,-1, 0],  # User 6
     [-1, 1,-1, 0,-1, 0, 1],  # User 7
     [ 0,-1, 0, 1, 1,-1,-1],  # User 8
     [ 0, 0,-1, 1, 1, 0,-1]]  # User 9
P = np.array(P)
C = np.abs(P) # Will be 0 only when P[i,j] == 0.
print('Raw Preference Matrix:')
print(P)

Raw Preference Matrix:
[[ 0  0 -1  0 -1  1  1]
 [-1  1  1 -1  0  1  1]
 [ 0  1  1  0  0 -1  1]
 [-1  1  1  0  0  1  1]
 [ 0  1  1  0  0  1  1]
 [ 1 -1  1  1  1 -1  0]
 [-1  1 -1  0 -1  0  1]
 [ 0 -1  0  1  1 -1 -1]
 [ 0  0 -1  1  1  0 -1]]

In [2]:

# Parameters
reg = 0.1 # regularization parameter
f = 2     # number of factors
m,n = P.shape

In [3]:

#Random Initialization
# X is (m x f)
# Y is (f x n)
X = 1 - 2*np.random.rand(m,f)
Y = 1 - 2*np.random.rand(f,n)
X *= 0.1
Y *= 0.1

In [4]:

# Alternating Ridge Regression
for _ in xrange(100):
    # Least-squares keeping Y fixed
    X = np.linalg.solve(
            np.dot(Y, Y.T) + reg * np.eye(f),
            np.dot(Y, P.T)
            ).T
    # Least-squares keeping X fixed
    Y = np.linalg.solve(
            np.dot(X.T, X) + reg * np.eye(f),
            np.dot(X.T, P)
            )
print('Alternating Ridge Regression:')
print(np.dot(X,Y))
print('Error for movies that users actually rated: %.2f'%np.sum((C*(P - np.dot(X,Y)))**2))

Alternating Ridge Regression:
[[-0.47047868  0.42881288 -0.92322674 -0.40431625 -0.82818449  0.71377038
   0.35042258]
 [-0.57771631  1.11906024  0.94589649 -0.6138595  -0.41293394  0.72761475
   1.24992152]
 [-0.08329304  0.47515025  1.23776778 -0.15067507  0.26037126  0.02607104
   0.61430612]
 [-0.4797323   0.97615665  0.95005888 -0.51903623 -0.29509116  0.59242618
   1.10279904]
 [-0.37593946  0.83720164  0.99806064 -0.42105221 -0.1576005   0.44610297
   0.96413601]
 [ 0.67254752 -0.68837628  1.05514875  0.59290466  1.10703169 -1.00139291
  -0.60521603]
 [-0.56690331  0.63195467 -0.7079191  -0.51001528 -0.88042456  0.83110331
   0.58167835]
 [ 0.65941592 -0.96038474  0.03268862  0.63788069  0.79442036 -0.91013103
  -0.98826639]
 [ 0.32613592 -0.67760237 -0.69495382  0.35562611  0.186357   -0.39923578
  -0.76905741]]
Error for movies that users actually rated: 5.73

In [5]:

# Re-initialize
X = 1 - 2*np.random.rand(m,f)
Y = 1 - 2*np.random.rand(f,n)
X *= 0.1
Y *= 0.1

In [6]:

# Alternating Weighted Ridge Regression
for _ in xrange(100):
    # Each user u has a different set of weights Cu
    for u,Cu in enumerate(C):
        X[u] = np.linalg.solve(
                    np.dot(Y, np.dot(np.diag(Cu), Y.T)) + reg * np.eye(f),
                    np.dot(Y, np.dot(np.diag(Cu), P[u].T))
                    ).T
    for i,Ci in enumerate(C.T):
        Y[:,i] = np.linalg.solve(
                    np.dot(X.T, np.dot(np.diag(Ci), X)) + reg * np.eye(f),
                    np.dot(X.T, np.dot(np.diag(Ci), P[:, i]))
                    )
print('Alternating Weighted Ridge Regression:')
print(np.dot(X,Y))
print('Error for movies that users actually rated: %.2f'%np.sum((C*(P - np.dot(X,Y)))**2))

Alternating Weighted Ridge Regression:
[[-0.8785934   0.75591055 -0.84723564 -0.90527554 -0.91301451  1.22456781
   0.7703408 ]
 [-0.96653212  1.12037936  0.83860294 -1.00626954 -1.00818432  0.72382856
   1.10750498]
 [-0.32540594  0.55731684  1.38658549 -0.34526083 -0.34179019 -0.14502803
   0.53505323]
 [-0.96804918  1.12194066  0.83870998 -1.00784188 -1.00976418  0.72539036
   1.10906571]
 [-0.95829148  1.11193016  0.83821615 -0.99772977 -0.99960303  0.71527644
   1.09905574]
 [ 0.92243741 -0.80265369  0.83420677  0.95077543  0.95869447 -1.26620706
  -0.81690604]
 [-1.02704309  0.8941689  -0.92578349 -1.05861242 -1.06741822  1.40873236
   0.90998839]
 [ 0.95137278 -0.98077431 -0.07728943  0.98609905  0.99077203 -0.97584608
  -0.98024936]
 [ 0.89692922 -1.06910816 -0.9585243   0.93486255  0.93596746 -0.60822952
  -1.05423328]]
Error for movies that users actually rated: 2.01

Exercise 2¶

In [7]:

not_C = abs(1 - C) # movies not rated
P_hat = np.dot(X, Y)
top_movie_id = np.argmax(P_hat - (4*C), axis=1)
for u, tm_id in zip(range(m), top_movie_id):
    print('User %d liked %s'%(u+1, ', '.join([movie_titles[i] for i,p in enumerate(P[u]) if p == 1])))
    print('User %d disliked %s'%(u+1, ', '.join([movie_titles[i] for i,p in enumerate(P[u]) if p == -1])))
    print('For user %d the top movie is movie n.%d (%s) - predicted vote %.2f\n'% \
          (u+1, tm_id+1, movie_titles[tm_id], P_hat[u,tm_id]))

User 1 liked The Dark Knight, The Lord of the Rings
User 1 disliked Bourne Identity, The Devil Wears Prada
For user 1 the top movie is movie n.2 (Matrix) - predicted vote 0.76

User 2 liked Matrix, Bourne Identity, The Dark Knight, The Lord of the Rings
User 2 disliked Legally Blond, You’ve Got Mail
For user 2 the top movie is movie n.5 (The Devil Wears Prada) - predicted vote -1.01

User 3 liked Matrix, Bourne Identity, The Lord of the Rings
User 3 disliked The Dark Knight
For user 3 the top movie is movie n.1 (Legally Blond) - predicted vote -0.33

User 4 liked Matrix, Bourne Identity, The Dark Knight, The Lord of the Rings
User 4 disliked Legally Blond
For user 4 the top movie is movie n.4 (You’ve Got Mail) - predicted vote -1.01

User 5 liked Matrix, Bourne Identity, The Dark Knight, The Lord of the Rings
User 5 disliked 
For user 5 the top movie is movie n.1 (Legally Blond) - predicted vote -0.96

User 6 liked Legally Blond, Bourne Identity, You’ve Got Mail, The Devil Wears Prada
User 6 disliked Matrix, The Dark Knight
For user 6 the top movie is movie n.7 (The Lord of the Rings) - predicted vote -0.82

User 7 liked Matrix, The Lord of the Rings
User 7 disliked Legally Blond, Bourne Identity, The Devil Wears Prada
For user 7 the top movie is movie n.6 (The Dark Knight) - predicted vote 1.41

User 8 liked You’ve Got Mail, The Devil Wears Prada
User 8 disliked Matrix, The Dark Knight, The Lord of the Rings
For user 8 the top movie is movie n.1 (Legally Blond) - predicted vote 0.95

User 9 liked You’ve Got Mail, The Devil Wears Prada
User 9 disliked Bourne Identity, The Lord of the Rings
For user 9 the top movie is movie n.1 (Legally Blond) - predicted vote 0.90